/*
Create table 4 using BRFSS
*/

libname brfss "D:\Data\Behavioral Risk Factor Surveillance System";
OPTIONS obs = max compress = yes NOFMTERR;

*Extract and recode from BRFSS;
%macro brfss(year,flu1,flu2,pneu,hplan,weight,binge,days_drink,drink_any,doctor,no_care_cost);
data brfss&year.;
   set brfss.brfss_&year. (keep = AGE &flu1. &flu2. &pneu. &hplan. &weight. &binge. &days_drink. &drink_any. &doctor. &no_care_cost.
                           IMONTH IDAY IYEAR SEX _STATE GENHLTH MEDCOST EDUCA RACE2 EMPLOY HLTHPLAN _PSU _STSTR);
   format surv_d surv_month date.;
  *Change in insurance coverage;
   if HLTHPLAN = 1 then insurance = 1; 
   if HLTHPLAN = 2 then insurance = 0;
  *Flu stuff;
   if &flu1. = 1 or &flu2. = 1 then flu_shot = 1; else flu_shot = 0;
   if &flu1. not in (1,2) then flu_shot = .;*Pre 2000 this is part of a module before this there are refulsals and dont knows;
   if &pneu. = 1 then PNEUMVAC_vac = 1; else PNEUMVAC_vac = 0;
   if &pneu. not in (1,2) then PNEUMVAC_vac = .;
   surv_d = mdy(IMONTH,IDAY,IYEAR);
   surv_month = mdy(IMONTH,15,IYEAR);  
  *Change in insurance coverage;
   if &hplan. = 1 then insurance = 1; 
   if &hplan. = 2 then insurance = 0; 
  *Weights;
   weight = &weight.;
  *Self reported health;
   if GENHLTH = 1 and GENHLTH not in (7,9,.) then health_excellent = 1; else health_excellent = 0;
   if GENHLTH in (1,2) and GENHLTH not in (7,9,.) then health_Vgood_ob = 1; else health_Vgood_ob = 0;
   if GENHLTH in (1,2,3) and GENHLTH not in (7,9,.) then health_good_ob = 1; else health_good_ob = 0;
   if GENHLTH in (1,2,3,4) and GENHLTH not in (7,9,.) then fair_ob = 1; else fair_ob = 0;
  *Drank in last 30 days;
   if &drink_any. = 1 then drank30 = 1; 
   if &drink_any. = 2 then drank30 = 0;
  *Days drinking in last 30;
   if &days_drink. >= 101 and &days_drink. <= 199 then days_drinking = (&days_drink.-100)*4.345238; *Report per week;
   if &days_drink. >= 201 and &days_drink. <= 299 then days_drinking = &days_drink.-200; *Report per month;
   if &days_drink. = 888 then days_drinking = 0;
   if &drink_any. = 2 then days_drinking = 0; *Because they are not asked how many drinks they had if they report no drinks in last 30 days; 
  *Binge drinking - DRNK3GE5 is 5 for man and 4 for woman, DRNK2GE5 is 5 for everyone - At least a few people have multiple 
   binge drinking events on some days as there are a few people reporting numbers over 30;
   if &binge. >= 1 and &binge. <= 76 then binge_days =  &binge. ;
   if &binge. = 88 then  binge_days = 0;
   if &drink_any. = 2 then binge_days = 0; *Because they are not asked how many drinks they had if they report no drinks in last 30 days; 
   if &days_drink. = 888 then binge_days = 0; *Second opportunity to say no drinking in last 30 - In some file years??;
  *Any binge drinking in past month;
   if &binge. >= 1 and &binge. <= 76 then binge_l30 =  1 ;
   if &binge. = 88 then  binge_l30 = 0;
   if &drink_any. = 2 then binge_l30 = 0; *Because they are not asked how many drinks they had if they report no drinks in last 30 days; 
   if &days_drink. = 888 then binge_l30 = 0; *Second opportunity to say no drinking in last 30;
  *Education level;
   if EDUCA = 3 then some_HS = 1; else some_HS = 0;
   if EDUCA = 4 then HS = 1; else HS = 0;
   if EDUCA in (5,6) then college = 1; else college = 0;
   *Race;
    if RACE2 = 1 then white = 1; else white = 0;
	if RACE2 = 2 then black = 1; else black = 0;
	if RACE2 = 8 then Hispanic = 1; else Hispanic = 0;
  *Employment status;
   if EMPLOY in (1,2) then working = 1; 
   if EMPLOY in (3,4,5,6,7,8) then working = 0;
  *Have a Doctor;
   if &doctor. in (1,2) then have_doctor = 1;  
   if &doctor. = 3 then have_doctor = 0;  
  *No care due to cost;
   if &no_care_cost. = 1 then no_care_cost = 1;
   if &no_care_cost. = 2 then no_care_cost = 0;
run;
%mend;

%brfss(2010,FLUSHOT4,FLUSPRY3,PNEUVAC3,HLTHPLAN,_FINALWT,DRNK3GE5,ALCDAY4,DRNKANY4,PERSDOC2,MEDCOST);
%brfss(2009,FLUSHOT3,FLUSPRY2,PNEUVAC3,HLTHPLAN,_FINALWT,DRNK3GE5,ALCDAY4,DRNKANY4,PERSDOC2,MEDCOST);
%brfss(2008,FLUSHOT3,FLUSPRY2,PNEUVAC3,HLTHPLAN,_FINALWT,DRNK3GE5,ALCDAY4,DRNKANY4,PERSDOC2,MEDCOST);
%brfss(2007,FLUSHOT3,FLUSPRY2,PNEUVAC3,HLTHPLAN,_FINALWT,DRNK3GE5,ALCDAY4,DRNKANY4,PERSDOC2,MEDCOST);
%brfss(2006,FLUSHOT3,FLUSPRY2,PNEUVAC3,HLTHPLAN,_FINALWT,DRNK3GE5,ALCDAY4,DRNKANY4,PERSDOC2,MEDCOST);
%brfss(2005,FLUSHOT3,FLUSPRY2,PNEUVAC3,HLTHPLAN,_FINALWT,DRNK2GE5,ALCDAY4,DRNKANY4,PERSDOC2,MEDCOST);
%brfss(2004,FLUSHOT2,FLUSPRAY,PNEUVAC2,HLTHPLAN,_FINALWT,DRNK2GE5,ALCDAY3,DRNKANY3,PERSDOC2,MEDCOST);


*Take a look at the data;
data brfss;
   set brfss2004 brfss2005 brfss2006 brfss2007 brfss2008 brfss2009 brfss2010 ; 
   year = year(surv_month);
   days_drinking_if_drank = days_drinking;
   if days_drinking = 0 then days_drinking_if_drank = .;
   if _STATE in (4,34,55) then ED_group = 1; else ED_group = 0;
   if _STATE in (4,36,48,55) then inp_group = 1; else inp_group = 0;
  *Age groups for survey reg;
   if age in (19,20,21,22) then age_19_22 = 1; else age_19_22 = 0;
   if age in (19,20) then age_19_20 = 1; else age_19_20 = 0;
   if age in (21,22) then age_21_22 = 1; else age_21_22 = 0;
   count = 1;
run;

*Check stability;
proc means data = brfss  ;
   var flu_shot health_excellent health_Vgood_ob health_good_ob fair_ob drank30 days_drinking days_drinking_if_drank binge_l30 binge_days white black hispanic some_HS HS college working insurance have_doctor no_care_cost;
   output out = stability (where=(_type_ = 1 and _stat_ = 'MEAN'));
   weight weight; 
   class year;
 run;


%macro pull_m(var,domain_v);
*No need to include strata and cluster for the survey means in this context as I am not interested in the SE. I am getting 
 contrast from regression below so use proc means as it is much much faster;
*All states;
proc means data = brfss  noprint;
   var &var.;
   weight weight; 
   where  &domain_v. = 1;
   output out = all_states (where=(_STAT_ = 'MEAN') rename = (&var. = all_states));
run;

*ED states;
proc means data = brfss  noprint;
   var &var.;
   weight weight; 
   where  &domain_v. = 1 and ED_group = 1;
   output out = ed_states (where=(_STAT_ = 'MEAN') rename = (&var. = ed_states));
run;

*Inpatient states;
proc means data = brfss  noprint;
   var &var.;
   weight weight; 
   where  &domain_v. = 1 and inp_group = 1;
   output out = inp_states (where=(_STAT_ = 'MEAN') rename = (&var. = inp_states));
run;

*Get p-value of difference between ED and overall;
proc surveyreg data = brfss;
   model &var. = ED_group;
   domain  &domain_v.;
   weight weight; 
   Strata _STSTR; 
   Cluster _PSU; 
   ods output ParameterEstimates = ED_group_pval;
run;

*Get p-value of difference;
proc surveyreg data = brfss;
   model &var. = inp_group;
   domain  &domain_v.;
   weight weight;
   Strata _STSTR; 
   Cluster _PSU;  
   ods output ParameterEstimates = inp_group_pval;
run;

data  &var._&domain_v. (drop = &domain_v. Parameter);
   format label_v $25.;
   merge all_states (keep = all_states)
         ed_states  (keep = ed_states)
         ED_group_pval (where=(&domain_v.= 1 and Parameter = "ED_group") rename = (  Probt = prob_ed_diff)  keep = &domain_v. Probt    Parameter )
         inp_states  (keep = inp_states)
         inp_group_pval (where=(&domain_v.= 1 and Parameter = "inp_group") rename = (  Probt = prob_inp_diff)  keep = &domain_v. Probt    Parameter );
   label_v = "&var.";
run;

proc datasets; 
   delete all_states ed_states ED_group_pval inp_states inp_group_pval;
run;

%mend;
 
%pull_m(health_excellent,age_19_22); 
%pull_m(health_Vgood_ob,age_19_22); 
%pull_m(health_good_ob,age_19_22); 
%pull_m(fair_ob,age_19_22); 

%pull_m(white,age_19_22);
%pull_m(black,age_19_22);
%pull_m(Hispanic,age_19_22);
%pull_m(some_HS,age_19_22);
%pull_m(HS,age_19_22);
%pull_m(college,age_19_22);
%pull_m(working,age_19_22);
%pull_m(insurance,age_19_22);

%pull_m(drank30,age_19_22); 
%pull_m(drank30,age_19_20); 
%pull_m(drank30,age_21_22); 

%pull_m(days_drinking,age_19_22); 
%pull_m(days_drinking,age_19_20); 
%pull_m(days_drinking,age_21_22); 

%pull_m(days_drinking_if_drank,age_19_22); 
%pull_m(days_drinking_if_drank,age_19_20); 
%pull_m(days_drinking_if_drank,age_21_22); 

%pull_m(binge_l30,age_19_22); 
%pull_m(binge_l30,age_19_20); 
%pull_m(binge_l30,age_21_22); 

%pull_m(binge_days,age_19_22); 
%pull_m(binge_days,age_19_20); 
%pull_m(binge_days,age_21_22); 

%pull_m(have_doctor,age_19_22); 
%pull_m(no_care_cost,age_19_22);  



*libname code "C:\Research\Alcohol and Morbidity\4. Code up from scratch";
*Create a count variable;
data  table1 (drop = parameter);
   format label_v  $50. all_states best. ed_states best.   prob_ed_diff PVALUE6.3 inp_states best. prob_inp_diff   PVALUE6.3;
   set white_age_19_22 black_age_19_22 hispanic_age_19_22 
       drank30_age_19_22 drank30_age_19_20 drank30_age_21_22
	   days_drinking_age_19_22  days_drinking_age_19_20 days_drinking_age_21_22
	   days_drinking_if_drank_age_19_22  days_drinking_if_drank_age_19_20  days_drinking_if_drank_age_21_22
	   binge_l30_age_19_22  binge_l30_age_19_20  binge_l30_age_21_22
	   binge_days_age_19_22  binge_days_age_19_20  binge_days_age_21_22
       health_excellent_age_19_22 health_Vgood_ob_age_19_22 health_good_ob_age_19_22 fair_ob_age_19_22 
       some_HS_age_19_22 HS_age_19_22 college_age_19_22 
       working_age_19_22 insurance_age_19_22 have_doctor_age_19_22 no_care_cost_age_19_22;
  *Convert proportion into percent to make them more readable;
   if label_v not in ("days_drinking","days_drinking_if_drank","binge_days") then do;
      all_states = round(100*all_states,.1);
	  ed_states = round(100*ed_states,.1);
	  inp_states = round(100*inp_states,.1);
   end;   
   if label_v in ("days_drinking","days_drinking_if_drank","binge_days") then do;
      all_states = round(all_states,.01);
	  ed_states = round(ed_states,.01);
	  inp_states = round(inp_states,.01);
   end;  
  *Create nice labels;
   if label_v = "white" then label_v = "White";
   if label_v = "black" then label_v = "Black";
   if label_v = "Hispanic" then label_v = "Hispanic";
   if label_v = "drank30" then label_v = "Drank in Last 30 Days";  
   if label_v = "days_drinking" then label_v = "Days Drinking in Last 30";  
   if label_v = "days_drinking_if_drank" then label_v = "Days Drinking in Last 30 if Drank";
   if label_v = "binge_l30" then label_v = "Binged in Last 30 Days";
   if label_v = "binge_days" then label_v = "Days Binge Drinking in Last 30";
   if label_v = "health_excellent" then label_v = "Health Excellent";
   if label_v = "health_Vgood_ob" then label_v = "Very good or Better";
   if label_v = "health_good_ob" then label_v = "Good or Better";
   if label_v = "fair_ob" then label_v = "Fair or Better";
   if label_v = "some_HS" then label_v = "Some High School";  
   if label_v = "HS" then label_v = "High School Graduate or GED";
   if label_v = "college" then label_v = "Some College";  
   if label_v = "working" then label_v = "Working";  
   if label_v = "insurance" then label_v = "Health Insurance";  
   if label_v = "have_doctor" then label_v = "Have a Doctor";  
   if label_v = "no_care_cost" then label_v = "No Care Due to Cost";  
run;
*Get sample counts - enter these by hand;
proc means data = brfss sum ;
   var count ED_group inp_group;
   where age in (19,20,21,22);
run;
proc means data = brfss sum ;
   var count ED_group inp_group;
   where age in (19,20);
run;
proc means data = brfss sum ;
   var count ED_group inp_group;
   where age in (21,22);
run;

proc export 
   data=table1 
   outfile="C:\Research\Alcohol and Morbidity\7. Code Archive\Code for Figures and Tables in Paper\Table 4 Raw.xlsx" 
   dbms = xlsx replace;
run;














